library(tidyverse)
## ── Attaching packages ───────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
surveys_complete <- read_csv("data/surveys_complete.csv")
## Parsed with column specification:
## cols(
##   record_id = col_double(),
##   month = col_double(),
##   day = col_double(),
##   year = col_double(),
##   plot_id = col_double(),
##   species_id = col_character(),
##   sex = col_character(),
##   hindfoot_length = col_double(),
##   weight = col_double(),
##   genus = col_character(),
##   species = col_character(),
##   taxa = col_character(),
##   plot_type = col_character()
## )

###Plotting with ggplot2

#ggplot(data = , mapping = aes()) + ()

##Bind the plot to a specific data frame:

ggplot(data = surveys_complete)

##Define aesthetic map:

ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length))

##To add a geom use +

ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
  geom_point()

##Can also be done as:

# Assign plot to a variable
surveys_plot <- ggplot(data = surveys_complete, 
                       mapping = aes(x = weight, y = hindfoot_length))

# Draw the plot
surveys_plot + 
    geom_point()

# This is the correct syntax for adding layers
surveys_plot +
  geom_point()

# This will not add the new layer and will return an error message
#surveys_plot
#  + geom_point()

###CHALLENGE Q1

##Hexagonal binning with hexbin from CRAN:

install.packages("hexbin")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
library("hexbin")

##Geom_hex fx:

surveys_plot +
 geom_hex()

###Building plots iteratively

ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
    geom_point()

##Add transparency to avoid overplotting: use alpha

ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.1)

##Add colors:

ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.1, color = "blue")

##Use a vector as an input to the argument color: ex. species_id

ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.1, aes(color = species_id))

###CHALLENGE Q2

#Use what you just learned to create a scatter plot of weight over species_id with the plot types showing in different colors. Is this a good way to show this type of data?

#Ans.

ggplot(data = surveys_complete, 
       mapping = aes(x = species_id, y = weight)) +
   geom_point(aes(color = plot_type))

#This is not the best way to show this type of data, because there is a lot of overlap.

###Boxplot

#To visualize the distribution of weight in each species:

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
    geom_boxplot()

##Add points for better visualization of numbers:

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
    geom_boxplot(alpha = 0) +
    geom_jitter(alpha = 0.3, color = "tomato")

##To bring boxplots in front of points change the order of the code:

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
    geom_jitter(alpha = 0.3, color = "tomato")+
  geom_boxplot(alpha = 0) 

###CHALLENGE Q3

#Boxplots are useful summaries, but hide the shape of the distribution. For example, if there is a bimodal distribution, it would not be observed with a boxplot. An alternative to the boxplot is the violin plot (sometimes known as a beanplot), where the shape (of the density of points) is drawn. #*Replace the box plot with a violin plot; see geom_violin().

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
    geom_violin()

#In many types of data, it is important to consider the scale of the observations. For example, it may be worth changing the scale of the axis to better distribute the observations in the space of the plot. Changing the scale of the axes is done similarly to adding/modifying other components (i.e., by incrementally adding commands). Try making these modifications: #*Represent weight on the log10 scale; see scale_y_log10().

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
    geom_violin()+
  scale_y_log10()

#So far, we’ve looked at the distribution of weight within species. Try making a new plot to explore the distribution of another variable within each species. #Create boxplot for hindfoot_length. Overlay the boxplot layer on a jitter layer to show actual measurements. #Add color to the data points on your boxplot according to the plot from which the sample was taken (plot_id).

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = hindfoot_length)) +
    geom_boxplot()+
  geom_jitter(alpha = 0.1, aes(color = plot_id))

###Plotting Time Series Data

#Number of counts per year for each genus:

yearly_counts <- surveys_complete %>%
  count(year, genus)

#Timelapse data as a line plot:

ggplot(data = yearly_counts, aes(x = year, y = n)) +
     geom_line()

#Tell ggplot to draw a line for each genus:

ggplot(data = yearly_counts, aes(x = year, y = n, group = genus)) +
    geom_line()

#Use color to automatically group data:

ggplot(data = yearly_counts, aes(x = year, y = n, color = genus)) +
    geom_line()

###Integrating the pipe operator with ggplot2

#Use pipe operator to pass the data argument to the ggplot function:

yearly_counts %>% 
    ggplot(mapping = aes(x = year, y = n, color = genus)) +
    geom_line()

#Link data manipulation with data visualization:

yearly_counts_graph <- surveys_complete %>%
    count(year, genus) %>% 
    ggplot(mapping = aes(x = year, y = n, color = genus)) +
    geom_line()

yearly_counts_graph

###Faceting

#Split one plot into multiple to make a time series plot for each species:

ggplot(data = yearly_counts, aes(x = year, y = n)) +
    geom_line() +
    facet_wrap(facets = vars(genus))

#Split line in each plot by sex:

 yearly_sex_counts <- surveys_complete %>%
                      count(year, genus, sex)

#Split using color:

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_wrap(facets =  vars(genus))

#Facet by sex and genus:

ggplot(data = yearly_sex_counts, 
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(rows = vars(sex), cols =  vars(genus))

#Organize panels by rows or columns:

# One column, facet by rows
ggplot(data = yearly_sex_counts, 
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(rows = vars(genus))

# One row, facet by column
ggplot(data = yearly_sex_counts, 
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(cols = vars(genus))

###ggplot2 Themes

#Use theme to customize the graph:

 ggplot(data = yearly_sex_counts, 
        mapping = aes(x = year, y = n, color = sex)) +
     geom_line() +
     facet_wrap(vars(genus)) +
     theme_bw()

###CHALLENGE Q4

#Create a plot that depicts how the average weight of each species changes through the years.

yearly_weight <- surveys_complete %>%
                group_by(year, species_id) %>%
                 summarize(avg_weight = mean(weight))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
ggplot(data = yearly_weight, mapping = aes(x=year, y=avg_weight)) +
   geom_line() +
   facet_wrap(vars(species_id)) +
   theme_bw()

###Customization

#Name axes and add title to figure:

ggplot(data = yearly_sex_counts, aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    labs(title = "Observed genera through time",
         x = "Year of observation",
         y = "Number of individuals") +
    theme_bw()

#Increase font size with the theme() function:

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    labs(title = "Observed genera through time",
        x = "Year of observation",
        y = "Number of individuals") +
    theme_bw() +
    theme(text=element_text(size = 16))

#Change orientation of the labels:

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    labs(title = "Observed genera through time",
        x = "Year of observation",
        y = "Number of individuals") +
    theme_bw() +
    theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90, hjust = 0.5, vjust = 0.5),
                        axis.text.y = element_text(colour = "grey20", size = 12),
                        strip.text = element_text(face = "italic"),
                        text = element_text(size = 16))

#Save changes as an object to apply to other plots:

grey_theme <- theme(axis.text.x = element_text(colour="grey20", size = 12, 
                                               angle = 90, hjust = 0.5, 
                                               vjust = 0.5),
                    axis.text.y = element_text(colour = "grey20", size = 12),
                    text=element_text(size = 16))

ggplot(surveys_complete, aes(x = species_id, y = hindfoot_length)) +
    geom_boxplot() +
    grey_theme

###CHALLENGE Q5

#See if you can change the thickness of the lines. #Can you find a way to change the name of the legend? What about its labels? #Try using a different color palette

ggplot(surveys_complete, aes(x = species_id, y = hindfoot_length, color = species_id)) +
    geom_boxplot() +
    grey_theme %>% 
   labs(title = "Hindfoot Length of Each Species",
         x = "Species",
         y = "Hindfoot Length") +
  theme(legend.position = "bottom")